# calculate entropy of data | True model(ps <-dnorm(y, mean=mu, sd=sigma) )
[1] 0.1972397 0.0219918 0.1209854 0.1841351
-sum(ps*log(ps))
[1] 0.9712345
Let’s fit some two simple models
m0
# fit model with just a meanm0 <-quap(alist( y ~dnorm(mu, sigma), mu ~dnorm(5, 3), sigma ~dexp(1) ), data=data.frame(x,y))precis(m0)
mean sd 5.5% 94.5%
mu 10.060778 1.7455243 7.271093 12.850463
sigma 3.761734 0.9382529 2.262224 5.261243
m1
# fit model with a mean and slopem1 <-quap(alist( y ~dnorm(mu, sigma), mu ~ a + b*x, a ~dnorm(5, 3), b ~dnorm(0,1), sigma ~dexp(1) ), data=data.frame(x,y))precis(m1)
mean sd 5.5% 94.5%
a 4.681411 1.4028117 2.4394467 6.923375
b 1.285321 0.2185176 0.9360877 1.634554
sigma 1.576968 0.4491855 0.8590828 2.294853
Let’s fit some two simple models
Cross entropy from using m0 to approximate Truth
\[
H(p, q) = -\sum_{i=1}^n p_i \log(q_i)
\]
## cross entropy(qs <-dnorm(y, mean=preds_m0[1:4], # probs if we use m0sd=mean(extract.samples(m0)$sigma)) )
[1] 0.02717686 0.06561993 0.05212724 0.02751893
-sum(ps*log(qs))
[1] 1.790003
# added entropy by using m0 to approximate True-sum(ps*log(qs)) --sum(ps*log(ps))
[1] 0.8187685
Cross entropy from using m1 to approximate Truth
\[
H(p, q) = -\sum_{i=1}^n p_i \log(q_i)
\]
## cross entropy(rs <-dnorm(y, mean=preds_m1[1:4], # probs if we use m1sd=mean(extract.samples(m1)$sigma)) )
[1] 0.09797833 0.06334920 0.21921167 0.18199660
-sum(ps*log(rs))
[1] 1.016212
# added entropy by using m1 to approximate True-sum(ps*log(rs)) --sum(ps*log(ps))
[1] 0.04497729
Kullback-Leibler divergence
\[
D_{KL}(p,q) = \sum_{i=1}^n p_i\left[ \log(p_i) - \log(q_i) \right]
\] measures the added entropy from using a model to approximate True
# added entropy by using m0 to approximate True-sum(ps*log(qs)) --sum(ps*log(ps))
[1] 0.8187685
## Dkl(p,q)sum(ps*(log(ps)-log(qs)))
[1] 0.8187685
## --> it's the same!
## added entropy by using m1 to approximate true-sum(ps*log(rs)) --sum(ps*log(ps))